/* 
*** Spillover effects at multi-establishment firms
	// input: estlevel_dataset, inclevel_dataset, microregions_municipality_concordance
	// output: spillovers
*/ 

cap log close
cap log using "$logs/heterogeneity_spillovers_log", replace
	

*********************************
** CHANGES IN FIRM ENVIRONMENT **
*********************************

* data toggles
local singest = 0 //0: no restriction; 1: must be a single establishment firm
local bothFM = 1 //0: no restriction; 1: must employ both men and women at basline
local covered = 1 //0: no restriction; 1: must be in the geographic coverage of the CBA
local signing = 1 //0: no restriction; 1: must be a signing establishment
local geoglevel = "microregion_mode" //state_mode: for state-yr FEs; microregion_mode: for microregion-yr FEs; stateind: for state-industry-yr FEs; microind: for microregion-industry-yr FEs
local no2011 = 1 //0: include 2011; 1: exclude 2011
local balanced = 0 //0: no restriction; 1: restrict to the balanced panel

	* Load establishment sample (main spells)
	use "$files/estlevel_dataset.dta", clear
	
	*Spillovers
	gen fakeid_estab_string = string(fakeid_estab, "%014.0f")
	gen fakeid_firm = substr(fakeid_estab_string,1,8)
	egen xxx = group(fakeid_firm)
	egen yyy = max(treat), by(xxx)
	drop if treat==1
	replace treat = 1 if yyy==1
	egen zzz = tag(fakeid_estab)
	keep if bl_bothFM==1
	keep if (bl_covered==1)
	keep if (bl_signing==1)
	tab treat if zzz==1
	drop xxx yyy zzz

	* sample restrictions
	if `singest' {
		keep if bl_singest==1
	}
	if `bothFM' {
		keep if bl_bothFM==1
	}
	if `covered' {
		keep if (bl_covered==1)
	}
	if `signing' {
		keep if (bl_signing==1)
	}
	if `balanced' {
		egen xxx = max(exit), by(fakeid_estab)
		keep if xxx==0
		drop xxx
	}
	
	* time varying fixed effects
	//industry
	gen ind2d_num = floor(ind_mode/1e3)
	//geography
	gen state_mode = floor(municipality_mode/1e4)
	rename municipality_mode municipality
	merge m:1 municipality using "$raw/microregions_municipality_concordance.dta", keepusing(microregion)
	drop if _merge==2
	drop _merge
	//fixed effects
	rename municipality municipality_mode
	rename microregion microregion_mode
	egen indyrFE = group(year ind2d_num)
	egen stateind = group(state_mode ind2d_num)
	egen microind = group(microregion_mode ind2d_num)
	egen geogyrFE = group(year `geoglevel')
	if "`geoglevel'"=="stateind" | "`geoglevel'"=="microind" {
		drop indyrFE
	}

	* for clustering
	egen clustergrp = group(fakeid_estab)

	* event study var 
	gen post=(year>=2015) if year!=.
	gen treatpost=treat*post
	tab year, gen(yearj)
		
	forvalues i = 1/7 {
		gen interj`i'=treat*yearj`i'
	}

	lab var interj1 "2011"
	lab var interj2 "2012"
	lab var interj3 "2013"
	lab var interj4 "2014"
	lab var interj5 "2015"
	lab var interj6 "2016"
	lab var interj7 "2017"

	gen zero=0
	lab var zero "2014"
	replace interj4 = zero

	* Outcome variables
	gen sh_mgmt_fem_mgmt = mgmt_fem/mgmt
	label var sh_mgmt_fem_mgmt 		"Share of female managers (among management)"
	gen sh_mlext_mleav = mlext_mleav/mleav
	label var sh_mlext_mleav			"Share on extended maternity leave (among workers on maternity leave)"
	gen sh_mlprot_mleav = mlprot_mleav/mleav
	label var sh_mlprot_mleav			"Share returning after maternity leave (among workers on maternity leave)"
	
	* Regressions
	if `no2011' {
		drop if year==2011
		drop interj1
		local xl = 3.5
	}
	else {
		local xl = 4.5
	}

	/*** Heterogeneity table ***/
	local depvar sh_mgmt_fem_mgmt sh_mlext_mleav sh_mlprot_mleav 
	local j=1
	foreach var of local depvar {
		
		reghdfe `var' treatpost, absorb(fakeid_estab *FE) cluster(clustergrp)
		estadd scalar obs=e(N_full)
		qui su `var' if year == 2014 & treat == 1 
		estadd scalar meandv=r(mean)
		estimates store p`j'0
			
		local ++j
	}
	
*** Export ***

	forvalues i = 1/3 {
		#d ;
		estout 	p`i'0 using "$tables/Sfirmenv_p`i'.txt", 
		style(tab) mlabels(none) label collabels(none)
		cells(b(star fmt(%9.3f)) se(par)) 
		stats(meandv obs, fmt(%9.2fc %9.0fc ) labels("mean depvar" "N" )) 
		drop(o.*, relax) keep(treatpost, relax) replace starlevels(* 0.10 ** 0.05 *** 0.01);
		#d cr
	}
	

***************
** RETENTION **
***************

* data toggles
local singest = 0 //0: no restriction; 1: worker's baseline establishment must be a single establishment firm
local bothFM = 1 //0: no restriction; 1: worker's baseline establishment must employ both men and women at basline
local covered = 1 //0: no restriction; 1: worker's baseline establishment must be in the geographic coverage of the CBA
local signing = 1 //0: no restriction; 1: worker's baseline establishment must be a signing establishment
local geoglevel = "microregion_mode" //"": for no geog-yr FES; "state_mode": for state-yr FEs; "microregion_mode": for microregion-yr FEs
local no2011 = 1 //0: include 2011; 1: exclude 2011
local restr = 0 //0: no restriction; 1: worker must be 18 or older and not be a in probation at baseline

	* load incumbent data
	use "$files/inclevel_dataset.dta", clear
	
	* Spillovers
	gen fakeid_estab_string = string(fakeid_estab, "%014.0f")
	gen fakeid_firm = substr(fakeid_estab_string,1,8)
	egen xxx = group(fakeid_firm) if year==2014
	egen xxx1 = max(xxx), by(fakeid_worker)
	egen yyy = max(treatwork), by(xxx1)
	drop if treatwork==1
	replace treatwork = 1 if yyy==1
	egen zzz = tag(fakeid_worker)
	tab treatwork if zzz==1
	keep if bothFMwork==1
	keep if coverwork==1
	keep if signwork==1
	tab treatwork if zzz==1
	drop xxx yyy zzz

	* sample restrictions
	if `singest' {
		keep if singestwork==1
	}
	if `bothFM' {
		keep if bothFMwork==1
	}
	if `covered' {
		keep if coverwork==1
	}
	if `signing' {
		keep if signwork==1
	}
	if `restr' {
		keep if blage>=18
		gen prob=(blten<=3)
		drop if prob==1
	}

	* time varying fixed effects
	//industry
	gen ind2d_num = floor(blind/1e3)
	gen ind3d_num = floor(blind/1e2)
	egen indyrFE = group(year ind2d_num)
	//geography
	rename blstate state_mode
	rename blmr microregion_mode
	egen geogyrFE = group(year `geoglevel')
	//tenure
	gen ten3m = floor(blten/3)
	egen tenyrFE = group(year ten3m)

	* for clustering
	gen clustergrp = blempl

	* event study var 
	gen post = (year>=2015) if year!=.
	gen treatpost = treatwork*post
	gen postgender = post*gender
	gen treatgender = treatwork*gender 

	tab year, gen(yearj)
		
	forvalues i = 1/7 {
		gen interj`i'=treatwork*yearj`i'
	}

	lab var interj1 "2011"
	lab var interj2 "2012"
	lab var interj3 "2013"
	lab var interj4 "2014"
	lab var interj5 "2015"
	lab var interj6 "2016"
	lab var interj7 "2017"

	gen zero=0
	lab var zero "2014"
	replace interj4 = zero

	* Inverse of employment: for weighting regressions
	gen blfeminv=1/blfem
	gen blmalinv=1/blmal

	gen blinv=blfeminv if gender==1
	replace blinv=blmalinv if gender==0

	* Outcome variables
	label var atblemp	"At baseline employer"

	* Regressions
	if `no2011' {
		drop if year==2011
		drop interj1
		local xl = 3.5
	}
	else {
		local xl = 4.5
	}

	//female; childbearing
	
	/*** Heterogeneity table ***/
	local depvar atblemp
	local j=1
	foreach var of local depvar {
		
		reghdfe `var' treatpost if gender==1 & childbearing==1 [aweight=blfeminv], absorb(fakeid_worker *FE) cluster(clustergrp)
		estadd scalar obs=e(N_full)
		qui su `var' if gender==1 & childbearing==1 & treatwork==1 & year == 2014
		estadd scalar meandv=r(mean)
		estimates store p`j'0
			
		local ++j
	}

	forvalues i = 1/1 {
		#d ;
		estout 	p`i'0 using "$tables/Sincfemcb_p`i'.txt", 
		style(tab) mlabels(none) label collabels(none)
		cells(b(star fmt(%9.3f)) se(par)) 
		stats(meandv obs, fmt(%9.2fc %9.0fc ) labels("mean depvar" "N" )) 
		drop(o.*, relax) keep(treatpost, relax) replace starlevels(* 0.10 ** 0.05 *** 0.01);
		#d cr
	}


************
** FIGURE **
************

import delimited "$tables/Sfirmenv_p1.txt", clear
drop if v1=="" | v1=="N"
keep v1 v2
replace v2 = subinstr(v2,"*","",.)
destring v2, replace
rename v2 sh_mgmt_fem_mgmt
tempfile temp1
save `temp1'

import delimited "$tables/Sfirmenv_p2.txt", clear
drop if v1=="" | v1=="N"
keep v1 v2
replace v2 = subinstr(v2,"*","",.)
destring v2, replace
rename v2 sh_mlext_mleav
tempfile temp2
save `temp2'

import delimited "$tables/Sfirmenv_p3.txt", clear
drop if v1=="" | v1=="N"
keep v1 v2
replace v2 = subinstr(v2,"*","",.)
destring v2, replace
rename v2 sh_mlprot_mleav
tempfile temp3
save `temp3'

import delimited "$tables/Sincfemcb_p1.txt", clear
drop if v1=="" | v1=="N"
keep v1 v2
replace v2 = subinstr(v2,"*","",.)
destring v2, replace
replace v2 = 0.68 if v1=="mean depvar"
rename v2 atblemp
tempfile temp4
save `temp4'

use `temp1', clear
merge 1:1 v1 using `temp2', nogenerate
merge 1:1 v1 using `temp3', nogenerate
merge 1:1 v1 using `temp4', nogenerate
xpose, clear varname
drop if v1==.
gen effect = (v2/v1)*100
gen order = .
replace order = 1 if _varname=="sh_mgmt_fem_mgmt"
replace order = 2 if _varname=="sh_mlext_mleav"
replace order = 3 if _varname=="sh_mlprot_mleav"
replace order = 4 if _varname=="atblemp"
graph bar effect, over(order, relabel(1 `""Share women" "among managers""' 2 `""Take extended" "maternity leave""' 3 `""Return from" "maternity leave""' 4 `""Female retention" "age 20-35""')) ///
	bar(1, color(sourapple1*.8)) ///
	ytitle("Treatment effect (relative to baseline)", size(medlarge) height(5)) ///
	ylabel(0 "0%" 2 "2%" 4 "4%" 6 "6%" 8 "8%", labsize(medium)) ///
	text(2 11 "*", size(medium) placement(n)) /// 
	text(7.8 37 "*", size(medium) placement(n)) ///
	text(1.2 89 "*", size(medium) placement(n)) ///
	graphregion(color(white))
graph export "$figures/spillovers.tif", replace

cap log close
